# -*- coding: utf-8 -*-

# Scrapy settings for pangolin project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     http://doc.scrapy.org/en/latest/topics/settings.html
#     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
#     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
from logging.handlers import TimedRotatingFileHandler

import logging
import MySQLdb.cursors
from twisted.enterprise import adbapi

BOT_NAME = 'pangolin'

SPIDER_MODULES = ['pangolin.spiders']
NEWSPIDER_MODULE = 'pangolin.spiders'


# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'pangolin (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False
# ROBOTSTXT_OBEY = True


# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
# 设置下载延迟单位为秒，系数为随机0.5-1.5，例：2*0.5 --- 2*1.5
DOWNLOAD_DELAY = 2
RANDOMIZE_DOWNLOAD_DELAY = True
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
COOKIES_ENABLED = False
SPLASH_URL = 'http://10.200.2.104:8050'

# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False

# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#   'Accept-Language': 'en',
#}

# Enable or disable spider middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
SPIDER_MIDDLEWARES = {
    'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
    'pangolin.middlewares.PangolinSpiderMiddleware': 200,
}

# Enable or disable downloader middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
#    'pangolin.middlewares.MyCustomDownloaderMiddleware': 543,
#}

# Enable or disable extensions
# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
#EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
#}

# Enable and configure the AutoThrottle extension (disabled by default)
# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

#可重试以及重试次数
RETRY_ENABLED = True
# Retry many times since proxies often fail
RETRY_TIMES = 5
# Retry on most error codes since proxies fail for different reasons
RETRY_HTTP_CODES = [500, 502, 503, 504, 526, 596, 400, 403, 404, 408, 429]

# 下载超时设置
DOWNLOAD_TIMEOUT = 60

# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
    # 'pangolin.pipelines.PangolinJosnPipeline': 301,
   'pangolin.pipelines.PangolinPipeline': 300,
}

DOWNLOADER_MIDDLEWARES = {
    # 'scrapy.downloadermiddlewares.retry.RetryMiddleware': None,
    # 'pangolin.middlewares.RetryMiddleware': 90,
    'scrapy.downloadermiddlewares.retry.RetryMiddleware': 80,
    'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': None,
    'pangolin.middlewares.ProxyMiddleware': 81,
    'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
    'pangolin.middlewares.RotateUserAgentMiddleware': 83,
    'pangolin.middlewares.JavaScriptMiddleware': 401,
    'scrapy_splash.SplashCookiesMiddleware': 90,
    'scrapy_splash.SplashMiddleware': 91,
    'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,

}
DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'
HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'
FEED_EXPORT_ENCODING = 'utf-8'

# 数据库配置
DB_CONF = {
    'host': '192.168.0.64',
    'db': 'spider',
    'user':  'root',
    'passwd': '111111',
    'port':  3306,
    'cursorclass':  MySQLdb.cursors.DictCursor,
    'charset':  'utf8',
    'use_unicode':  False
}

db_conn = adbapi.ConnectionPool('MySQLdb',
    host=DB_CONF['host'],
    db=DB_CONF['db'],
    user=DB_CONF['user'],
    passwd=DB_CONF['passwd'],
    port=DB_CONF['port'],
    cursorclass=DB_CONF['cursorclass'],
    charset=DB_CONF['charset'],
    use_unicode=DB_CONF['use_unicode'],
    cp_reconnect=True)

# 图片服务器配置
FILE_SERVER_CONF = {
    'host': '10.200.2.104',
    'port': 22,
    'user_name':  'lvtu_admin',
    'passwd': 'JEIZGHMcoHfmfsHV',
    'local_dir': '/home/lvtu_admin/project/pangolin/pangolin/screenshot',
    'remote_dir': '/home/lvtu_admin/project/image'
}

LOG_FILE = "log/spider_log.log"
# 日志文件路径（注意：本地运行代码采取正常保存日志形式，避免Windows报WIN32错误）
# level = logging.INFO
# logger = logging.getLogger()
# fileTimeHandler = logging.FileHandler(LOG_FILE)  # 正常保存
# 正常保存后按大小进行分割
# fileTimeHandler = logging.handlers.RotatingFileHandler(LOG_FILE,maxBytes=1024*1024,backupCount=40)
# 正常保存后按日进行分割(S秒M分钟H小时D天W周midnight在午夜)
# fileTimeHandler = logging.handlers.TimedRotatingFileHandler(LOG_FILE, when='D', interval=1, backupCount=30)
# formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
# fileTimeHandler.setFormatter(formatter)
# logger.addHandler(fileTimeHandler)
# logger.setLevel(logging.NOTSET)  # 设置保存的日志等级

# 服务器路径
NG_URL = 'http://rhino.lvmama.com/scrapy/'

# 是否保存数据库和服务器 TRUE保存 FALSE不保存
IS_SAVE_DATABASE = 'TRUE'

IS_SAVE_SERVER = 'TRUE'

# 是否截图
IS_SNAPSHOT = 'TRUE'

# 是否添加代理IP TRUE添加 FALSE 为不添加
IS_ADD_PROXY_IP = 'TRUE'

# 需要动态渲染的爬虫名称
DYNAMIC_RENDERING_SPIDER = ["tc_ticket_list_spider",
                            "tc_ticket_detail_spider",
                            # "tc_ticket_by_url_spider",
                            "qnr_ticket_list_spider_test",
                            "qnr_ticket_detail_by_scenery_spider",
                            "qnr_ticket_by_scenery_spider"]
